Conversation
| "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=${CATALOG_NAME}" | ||
| echo | ||
| echo "Catalog created" | ||
| cat << EOF | ${SPARK_HOME}/bin/spark-sql -S \ |
There was a problem hiding this comment.
Would it be possible to run this test as an Integration test under JUnit inside the Gradle builld?
There was a problem hiding this comment.
Hi @dimas-b The purpose of this regression test is to validate the end-to-end user experience when using Spark with both --packages and --jars. This is an important scenario that cannot be fully covered by integration tests.
While it is true that this test is relatively expensive, that is why it includes only very basic test cases. More complex scenarios and edge cases are covered by integration tests, which provide a more cost-effective approach.
There was a problem hiding this comment.
I believe Spark-based scenarios can be covered in regular CI with sufficient certainty... but I do not mean to block this PR on this point 🙂 Please consider optional.
plugins/spark/v3.5/regtests/run.sh
Outdated
| # Define test suites to run | ||
| # Each suite specifies: test_file:table_format:test_shortname | ||
| declare -a TEST_SUITES=( | ||
| "spark_sql.sh:delta:spark_sql" |
There was a problem hiding this comment.
How about let's enforce the test file name to the format like xxx_<table_format>.sh, and have a separate folder to include all test src file and reference file. Then we just need to list the folder to get all test files, and extract the table format by parsing the file name. The benefit would be easy to onboard new tests, and developer doesn't have to input a long string when running single test (just the file name)
plugins/spark/v3.5/regtests/setup.sh
Outdated
| # this is mostly useful for building the Docker image with all needed dependencies | ||
| ${SPARK_HOME}/bin/spark-sql -e "SELECT 1" | ||
| if [[ "$TABLE_FORMAT" == "hudi" ]]; then | ||
| # For Hudi: Pass --packages on command line to match official Hudi docs approach |
There was a problem hiding this comment.
i don't think we need the if else here anymore
| rm -rf /tmp/spark_hudi_catalog/ | ||
|
|
||
| curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ | ||
| http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME} > /dev/stderr No newline at end of file |
| exit 1 | ||
| fi | ||
|
|
||
| parse_test_suite() { |
There was a problem hiding this comment.
can we add a comment here about what this function is doing, it is trying to extract the TABLE_ROMAT, TEST_SHORTNAME and the full path of TEST_FILE, right?
| exit 1 | ||
| fi | ||
|
|
||
| # Allow running specific test via environment variable |
There was a problem hiding this comment.
I think we can potentially also allow running all suites for a particular format by taking table format as an argument to this script. We can probably do that in a separate PR as an improvement.
There was a problem hiding this comment.
lets do in another pr
Summary
cc @gh-yzou @singhpk234 @flyrain
Checklist
CHANGELOG.md(if needed)site/content/in-dev/unreleased(if needed)